# -*- coding: utf-8 -*-
# @Time    : 2023/9/25 14:28
# @Author  : 王凯
# @File    : congqing_grade.py
# @Project : spider-man
import re

import scrapy

from apps.creadit_grade_a.creadit_grade_a.items import NetCreditGradeAItem


class CongQingGradeSpider(scrapy.Spider):
    name = "congqing_grade"
    province = "重庆"
    url = "http://chongqing.chinatax.gov.cn/xxgkxt/pages/ajnsrmd/ajnsr.html"
    Request = scrapy.Request
    FormRequest = scrapy.FormRequest

    def start_requests(self):
        url = "http://chongqing.chinatax.gov.cn/xxgkxt/pages/ajnsrmd/js/data2022.js?v=0.0.2"
        yield self.Request(url, callback=self.parse_detail)

    def parse_detail(self, response, **kwargs):
        pt = re.compile(r"var data = (\[.*)", re.S)
        text = pt.findall(response.text)[0]
        data_list = eval(text)
        for datas in data_list:
            for data in datas:
                item = NetCreditGradeAItem()
                item.taxpayer_id = data.get("nsrsbh")
                item.company_name = data.get("nsrmc")
                item.year = data.get("pjnd")
                item.province = self.province
                yield item


def run():
    from scrapy import cmdline

    cmdline.execute("scrapy crawl congqing_grade".split())


if __name__ == "__main__":
    run()
